2024 Daiichi Dxd

Report: 30342

## load required packages
library(Seurat)
library(cowplot)
library(dplyr)
library(ggplot2)
library(DT)
library(paletteer)
library(forcats)


Patient: 30342

  • DC subset
    • Cluster7 (res 0.2)
dir= "~/Desktop/DF/DFCI_Paweletz/2024_Daiichi_DXD/"
obj.srt = readRDS(paste0(dir,"rds/P30342.24.05.07.rds"))

Dendritic Cells population

# Subset
obj.tmp = subset(obj.srt, RNA_snn_res.0.2 == 7)

Dendritic Cells Subset (UMAP)

DimPlot(obj.tmp, group.by = "orig.ident", pt.size = 0.2) + theme_bw() +
  xlim(c(11,13)) +ylim(c(-5,2)) +ggtitle("Cluster7:Dendritic Cells")

orig.ident_fils = paletteer::scale_fill_paletteer_d("ggsci::nrc_npg") 

Number of Dendritic Cells

res = "orig.ident"
obj.tmp@meta.data %>% ggplot(aes(!!sym(res), fill=!!sym(res))) + 
  geom_bar(alpha=0.7, color="grey5", size=0.1) +
  geom_text(stat="count", aes(label= ..count..), vjust=-0.5, size=3) +
  orig.ident_fils + 
  xlab("") + 
  theme_classic() +
  theme(legend.title = element_blank(),
        axis.text.x = element_text(angle = 45, hjust=1)) 

Fraction of Dendritic Cells

res = "orig.ident"
df = obj.srt@meta.data %>% dplyr::select(!!sym(res),RNA_snn_res.0.2)
df_all = df %>% dplyr::select(orig.ident) %>% table() %>% as.data.frame()  
df_7= df %>% filter(RNA_snn_res.0.2 == 7) %>% dplyr::select(orig.ident) %>% table() %>% as.data.frame()  

df_all$DC= df_7$Freq
df_all$DC_fraction= df_7$Freq/df_all$Freq *100
colnames(df_all)[2] = "Number_of_all_cells"

orig.ident_fils = paletteer::scale_fill_paletteer_d("ggsci::nrc_npg") 

ggplot(df_all, aes(x = orig.ident, y = DC_fraction, fill = orig.ident)) +
  geom_col(alpha = 0.7, color = "grey5", size = 0.1) +
  orig.ident_fils +
  geom_text(aes(label = round(DC_fraction, 2)), vjust = -0.5, size = 3) +  # 소수점 둘째자리까지 반올림하여 표시
  theme_classic() +
  theme(legend.title = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  ylab("Dendritic Cell Fraction in the sample") +
  xlab("")

DEG Analysis overview

Combination Table

Combination Comparison 1 Comparison 2
1 P30342_M P30342_D
2 P30342_M P30342_I
3 P30342_M P30342_A
4 P30342_D P30342_I
5 P30342_D P30342_A
6 P30342_I P30342_A

List

  1. P30342_M vs P30342_D
  2. P30342_M vs P30342_I
  3. P30342_M vs P30342_A
  4. P30342_D vs P30342_I
  5. P30342_D vs P30342_A
  6. P30342_I vs P30342_A

Comparison 1

  • P30342_M vs P30342_D
  • M is control

Volcano plot

# Define funtion 
#id1 = "treatment"
#id2 = "control"
#logfc = 0
deg.two.groups = function(obj.srt = obj.srt, treatment, control,logfc=0){
  Idents(obj.srt) = 'orig.ident'
  markers <-FindMarkers(
    obj.srt,
    logfc.threshold = logfc,
    ident.1 = treatment,
    ident.2 = control, slot= "data")
}
deg = deg.two.groups(obj.srt = obj.tmp,
                    treatment = "P30342_D", 
                    control = "P30342_M")
deg1 =deg 
t= paste0(paste0("P30342_", "D"), "/",paste0("P30342_", "M"))
deg %>% ggplot(aes(avg_log2FC, -log10(p_val))) + 
  geom_point(size=0.1) +
  geom_vline(xintercept = 0, size=0.1) +
  theme_bw() + ggtitle(t)

Volcano plot with DEG information

deg = deg %>% mutate(DE= ifelse(p_val <= 0.05 & avg_log2FC >= log2(1.2), "UP",
                                ifelse(p_val <= 0.05 & avg_log2FC <= -log2(1.2), "DN",
                                "no significant")))
deg$DE = factor(deg$DE, levels = c("UP","DN","no significant"))
up = nrow(deg[deg$DE == "UP", ])
dn = nrow(deg[deg$DE == "DN", ])
deg %>% ggplot(aes(avg_log2FC, -log10(p_val), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(1.2), log2(1.2)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(0.05), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() + ggtitle(t)

significance : p value <= 0.05
FC >= 1.2

DEG table

deg %>% DT::datatable(extensions = "Buttons", options = list(dom="Bfrtip", buttons=c("csv","excel"), pageLength=10))
# GSEA related functions 
library(clusterProfiler)

perform_GSEA <- function(res, ref, pvalueCutoff = 1) {
  ranking <- function(res) {
    # Check the name of log2fc related 
    if ("avg_log2FC" %in% names(res)) {
      df <- res$avg_log2FC
    } else if ("log2FoldChange" %in% names(res)) {
      df <- res$log2FoldChange
    } else {
      stop("Neither avg_log2FC nor log2FoldChange columns found in the data frame.")
    }
    names(df) <- rownames(res)
    df <- sort(df, decreasing = TRUE)
    return(df)
  }
  
  ranked.res <- ranking(res)
  
  x <- clusterProfiler::GSEA(geneList = ranked.res,
                             TERM2GENE = ref,
                             pvalueCutoff = pvalueCutoff,
                             pAdjustMethod = "BH",
                             verbose = TRUE,
                             seed = TRUE)
  
  result <- x@result %>% arrange(desc(NES))
  result <- result[, c('NES', 'pvalue', 'p.adjust', 'core_enrichment', 'ID')]
  return(result)
}

# GSEA Plot 
gsea_nes_plot <- function(gsea.res, title, color="pvalue") {
  gsea.res = gsea.res %>% mutate(sig=ifelse(pvalue <= 0.05, "p value <= 0.05", "p value > 0.05"))
  # basic plot
  p <- gsea.res %>%
    ggplot(aes(reorder(ID, NES), NES)) +
    geom_col(aes(fill=!!sym(color)), color="grey5", size=0.15, alpha=0.8) +
    coord_flip() +
    labs(x="Pathway", y="Normalized Enrichment Score", title="GSEA") +
    theme_classic() +
    theme(axis.text.x = element_text(size=5, face = 'bold'),
          axis.text.y = element_text(size=6, face = 'bold'),
          axis.title = element_text(size=10)) +
    ggtitle(title)
  
  # color by color input type
  if (color == "pvalue") {
    p <- p + scale_fill_gradient(low = 'orangered', high = '#E5E7E9')
  } else if (color == "sig") {
    p <- p + scale_fill_manual(values = c("orangered", "#E5E7E9"))
  }
  return(p)
}
# Pathway selection : HALLMARK 
hallmark <- msigdbr::msigdbr(species = "Homo sapiens", category = "H") %>% 
  dplyr::select(gs_name, gene_symbol) 
gsea.res = perform_GSEA(res = deg, ref = hallmark, pvalueCutoff = 1)

GSEA plot colored by p-value

gsea_nes_plot(gsea.res = gsea.res, title = t, color = "pvalue") 

GSEA plot colored by significance

gsea_nes_plot(gsea.res = gsea.res, title = t, color = "sig")

GSEA plot2 for the selected pathways

perform_GSEA2 <- function(res, ref, pvalueCutoff = 1) {
  ranking <- function(res) {
    df <- res$avg_log2FC
    names(df) <- rownames(res)
    df <- sort(df, decreasing = TRUE)
    return(df)
  }
  
  ranked.res <- ranking(res)
  
  # Initialize .Random.seed object
  set.seed(NULL)
  
  x <- clusterProfiler::GSEA(geneList = ranked.res,
                             TERM2GENE = ref,
                             pvalueCutoff = pvalueCutoff,
                             pAdjustMethod = "BH",
                             verbose = TRUE,
                             seed = TRUE)
  
  
  return(x)
}

# Application 
gsea.res.raw = perform_GSEA2(res = deg1, ref = hallmark) 

HALLMARK_INTERFERON_GAMMA_RESPONSE

id =gsea.res.raw@result$ID[grepl("GAMMA",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

HALLMARK_APOPTOSIS

id =gsea.res.raw@result$ID[grepl("APOP",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

HALLMARK_TNFA_SIGNALING_VIA_NFKB

id =gsea.res.raw@result$ID[grepl("TNFA",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

HALLMARK_INFLAMMATORY_RESPONSE

id =gsea.res.raw@result$ID[grepl("INFLAM",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

Comparison 2

  • P30342_M vs P30342_I
  • M is control

Volcano plot

deg = deg.two.groups(obj.srt = obj.tmp,
                    treatment = "P30342_I", 
                    control = "P30342_M")
deg2 =deg 
t= paste0(paste0("P30342_", "I"), "/",paste0("P30342_", "M"))
deg %>% ggplot(aes(avg_log2FC, -log10(p_val))) + 
  geom_point(size=0.1) +
  geom_vline(xintercept = 0, size=0.1) +
  theme_bw() + ggtitle(t)

Volcano plot with DEG information

deg = deg %>% mutate(DE= ifelse(p_val <= 0.05 & avg_log2FC >= log2(1.2), "UP",
                                ifelse(p_val <= 0.05 & avg_log2FC <= -log2(1.2), "DN",
                                "no significant")))
deg$DE = factor(deg$DE, levels = c("UP","DN","no significant"))
up = nrow(deg[deg$DE == "UP", ])
dn = nrow(deg[deg$DE == "DN", ])
deg %>% ggplot(aes(avg_log2FC, -log10(p_val), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(1.2), log2(1.2)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(0.05), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() + ggtitle(t)

significance : p value <= 0.05
FC >= 1.2

DEG table

deg %>% DT::datatable(extensions = "Buttons", options = list(dom="Bfrtip", buttons=c("csv","excel"), pageLength=10))
gsea.res = perform_GSEA(res = deg, ref = hallmark, pvalueCutoff = 1)

GSEA plot colored by p-value

gsea_nes_plot(gsea.res = gsea.res, title = t, color = "pvalue") 

GSEA plot colored by significance

gsea_nes_plot(gsea.res = gsea.res, title = t, color = "sig")

GSEA plot2 for the selected pathways

perform_GSEA2 <- function(res, ref, pvalueCutoff = 1) {
  ranking <- function(res) {
    df <- res$avg_log2FC
    names(df) <- rownames(res)
    df <- sort(df, decreasing = TRUE)
    return(df)
  }
  
  ranked.res <- ranking(res)
  
  # Initialize .Random.seed object
  set.seed(NULL)
  
  x <- clusterProfiler::GSEA(geneList = ranked.res,
                             TERM2GENE = ref,
                             pvalueCutoff = pvalueCutoff,
                             pAdjustMethod = "BH",
                             verbose = TRUE,
                             seed = TRUE)
  
  
  return(x)
}

# Application 
gsea.res.raw = perform_GSEA2(res = deg2, ref = hallmark) 

HALLMARK_G2M_CHECKPOINT

id =gsea.res.raw@result$ID[grepl("G2M",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

Comparison 3

  • P30342_M vs P30342_A
  • M is control

Volcano plot

deg = deg.two.groups(obj.srt = obj.tmp,
                    treatment = "P30342_A", 
                    control = "P30342_M")
deg3 =deg 
t= paste0(paste0("P30342_", "A"), "/",paste0("P30342_", "M"))
deg %>% ggplot(aes(avg_log2FC, -log10(p_val))) + 
  geom_point(size=0.1) +
  geom_vline(xintercept = 0, size=0.1) +
  theme_bw() + ggtitle(t)

Volcano plot with DEG information

deg = deg %>% mutate(DE= ifelse(p_val <= 0.05 & avg_log2FC >= log2(1.2), "UP",
                                ifelse(p_val <= 0.05 & avg_log2FC <= -log2(1.2), "DN",
                                "no significant")))
deg$DE = factor(deg$DE, levels = c("UP","DN","no significant"))
up = nrow(deg[deg$DE == "UP", ])
dn = nrow(deg[deg$DE == "DN", ])
deg %>% ggplot(aes(avg_log2FC, -log10(p_val), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(1.2), log2(1.2)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(0.05), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() + ggtitle(t)

significance : p value <= 0.05
FC >= 1.2

DEG table

deg %>% DT::datatable(extensions = "Buttons", options = list(dom="Bfrtip", buttons=c("csv","excel"), pageLength=10))
gsea.res = perform_GSEA(res = deg, ref = hallmark, pvalueCutoff = 1)

GSEA plot colored by p-value

gsea_nes_plot(gsea.res = gsea.res, title = t, color = "pvalue") 

GSEA plot colored by significance

gsea_nes_plot(gsea.res = gsea.res, title = t, color = "sig")

GSEA plot2 for the selected pathways

perform_GSEA2 <- function(res, ref, pvalueCutoff = 1) {
  ranking <- function(res) {
    df <- res$avg_log2FC
    names(df) <- rownames(res)
    df <- sort(df, decreasing = TRUE)
    return(df)
  }
  
  ranked.res <- ranking(res)
  
  # Initialize .Random.seed object
  set.seed(NULL)
  
  x <- clusterProfiler::GSEA(geneList = ranked.res,
                             TERM2GENE = ref,
                             pvalueCutoff = pvalueCutoff,
                             pAdjustMethod = "BH",
                             verbose = TRUE,
                             seed = TRUE)
  
  
  return(x)
}

# Application 
gsea.res.raw = perform_GSEA2(res = deg3, ref = hallmark) 

HALLMARK_KRAS_SIGNALING_DN

id =gsea.res.raw@result$ID[grepl("HALLMARK_KRAS_SIGNALING_DN",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

HALLMARK_INTERFERON_GAMMA_RESPONSE

id =gsea.res.raw@result$ID[grepl("GAM",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

Comparison 4

  • P30342_D vs P30342_I
  • D is control

Volcano plot

deg = deg.two.groups(obj.srt = obj.tmp,
                    treatment = "P30342_I", 
                    control = "P30342_D")
deg4 =deg 
t= paste0(paste0("P30342_", "I"), "/",paste0("P30342_", "D"))
deg %>% ggplot(aes(avg_log2FC, -log10(p_val))) + 
  geom_point(size=0.1) +
  geom_vline(xintercept = 0, size=0.1) +
  theme_bw() + ggtitle(t)

Volcano plot with DEG information

deg = deg %>% mutate(DE= ifelse(p_val <= 0.05 & avg_log2FC >= log2(1.2), "UP",
                                ifelse(p_val <= 0.05 & avg_log2FC <= -log2(1.2), "DN",
                                "no significant")))
deg$DE = factor(deg$DE, levels = c("UP","DN","no significant"))
up = nrow(deg[deg$DE == "UP", ])
dn = nrow(deg[deg$DE == "DN", ])
deg %>% ggplot(aes(avg_log2FC, -log10(p_val), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(1.2), log2(1.2)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(0.05), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() + ggtitle(t)

significance : p value <= 0.05
FC >= 1.2

DEG table

deg %>% DT::datatable(extensions = "Buttons", options = list(dom="Bfrtip", buttons=c("csv","excel"), pageLength=10))
gsea.res = perform_GSEA(res = deg, ref = hallmark, pvalueCutoff = 1)

GSEA plot colored by p-value

gsea_nes_plot(gsea.res = gsea.res, title = t, color = "pvalue") 

GSEA plot colored by significance

gsea_nes_plot(gsea.res = gsea.res, title = t, color = "sig")

### GSEA plot2 for the selected pathways

perform_GSEA2 <- function(res, ref, pvalueCutoff = 1) {
  ranking <- function(res) {
    df <- res$avg_log2FC
    names(df) <- rownames(res)
    df <- sort(df, decreasing = TRUE)
    return(df)
  }
  
  ranked.res <- ranking(res)
  
  # Initialize .Random.seed object
  set.seed(NULL)
  
  x <- clusterProfiler::GSEA(geneList = ranked.res,
                             TERM2GENE = ref,
                             pvalueCutoff = pvalueCutoff,
                             pAdjustMethod = "BH",
                             verbose = TRUE,
                             seed = TRUE)
  
  
  return(x)
}

# Application 
gsea.res.raw = perform_GSEA2(res = deg4, ref = hallmark) 

HALLMARK_INFLAMMATORY_RESPONSE

id =gsea.res.raw@result$ID[grepl("INFLAM",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

Comparison 5

  • P30342_D vs P30342_A
  • D is control

Volcano plot

deg = deg.two.groups(obj.srt = obj.tmp,
                    treatment = "P30342_A", 
                    control = "P30342_D")
deg5 =deg 
t= paste0(paste0("P30342_", "A"), "/",paste0("P30342_", "D"))
deg %>% ggplot(aes(avg_log2FC, -log10(p_val))) + 
  geom_point(size=0.1) +
  geom_vline(xintercept = 0, size=0.1) +
  theme_bw() + ggtitle(t)

Volcano plot with DEG information

deg = deg %>% mutate(DE= ifelse(p_val <= 0.05 & avg_log2FC >= log2(1.2), "UP",
                                ifelse(p_val <= 0.05 & avg_log2FC <= -log2(1.2), "DN",
                                "no significant")))
deg$DE = factor(deg$DE, levels = c("UP","DN","no significant"))
up = nrow(deg[deg$DE == "UP", ])
dn = nrow(deg[deg$DE == "DN", ])
deg %>% ggplot(aes(avg_log2FC, -log10(p_val), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(1.2), log2(1.2)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(0.05), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() + ggtitle(t)

significance : p value <= 0.05
FC >= 1.2

DEG table

deg %>% DT::datatable(extensions = "Buttons", options = list(dom="Bfrtip", buttons=c("csv","excel"), pageLength=10))
gsea.res = perform_GSEA(res = deg, ref = hallmark, pvalueCutoff = 1)

GSEA plot colored by p-value

gsea_nes_plot(gsea.res = gsea.res, title = t, color = "pvalue") 

GSEA plot colored by significance

gsea_nes_plot(gsea.res = gsea.res, title = t, color = "sig")

GSEA plot2 for the selected pathways

perform_GSEA2 <- function(res, ref, pvalueCutoff = 1) {
  ranking <- function(res) {
    df <- res$avg_log2FC
    names(df) <- rownames(res)
    df <- sort(df, decreasing = TRUE)
    return(df)
  }
  
  ranked.res <- ranking(res)
  
  # Initialize .Random.seed object
  set.seed(NULL)
  
  x <- clusterProfiler::GSEA(geneList = ranked.res,
                             TERM2GENE = ref,
                             pvalueCutoff = pvalueCutoff,
                             pAdjustMethod = "BH",
                             verbose = TRUE,
                             seed = TRUE)
  
  
  return(x)
}

# Application 
gsea.res.raw = perform_GSEA2(res = deg5, ref = hallmark) 

HALLMARK_INFLAMMATORY_RESPONSE

id =gsea.res.raw@result$ID[grepl("INFLAM",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

HALLMARK_TNFA_SIGNALING_VIA_NFKB

id =gsea.res.raw@result$ID[grepl("TNFA",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

HALLMARK_INTERFERON_ALPHA_RESPONSE

id =gsea.res.raw@result$ID[grepl("ALPHA",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

HALLMARK_INTERFERON_GAMMA_RESPONSE

id =gsea.res.raw@result$ID[grepl("GAMM",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

HALLMARK_IL6_JAK_STAT3_SIGNALING

id =gsea.res.raw@result$ID[grepl("IL6",gsea.res.raw@result$ID)]

# Draw the gseasplot2 
# enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
df =enrichplot::gseaplot2(gsea.res.raw, geneSetID = id, title = id)
print(df)

Top 30 leading genes

# Leading genes N
n= 30
# df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% dplyr::select(gene) %>% pull()

df2 = df[[2]][1]$dat %>% as.data.frame() %>% filter(position ==1) %>% head(n) %>% 
  dplyr::select(gene,geneList) 
colnames(df2)[2] = "log2FC"

df2 %>% DT::datatable()

Comparison 6

  • P30342_I vs P30342_A
  • I is control

Volcano plot

deg = deg.two.groups(obj.srt = obj.tmp,
                    treatment = "P30342_A", 
                    control = "P30342_I")
deg6 =deg 
t= paste0(paste0("P30342_", "A"), "/",paste0("P30342_", "I"))
deg %>% ggplot(aes(avg_log2FC, -log10(p_val))) + 
  geom_point(size=0.1) +
  geom_vline(xintercept = 0, size=0.1) +
  theme_bw() + ggtitle(t)

Volcano plot with DEG information

deg = deg %>% mutate(DE= ifelse(p_val <= 0.05 & avg_log2FC >= log2(1.2), "UP",
                                ifelse(p_val <= 0.05 & avg_log2FC <= -log2(1.2), "DN",
                                "no significant")))
deg$DE = factor(deg$DE, levels = c("UP","DN","no significant"))
up = nrow(deg[deg$DE == "UP", ])
dn = nrow(deg[deg$DE == "DN", ])
deg %>% ggplot(aes(avg_log2FC, -log10(p_val), color=DE)) + 
  geom_point(size=0.5, shape=19, alpha=0.7) +
  geom_vline(xintercept = c(-log2(1.2), log2(1.2)), size=0.1, color="grey88") +
  geom_hline(yintercept = -log10(0.05), size=0.1) +
  scale_color_manual(values = c("red3","royalblue3","grey"), guide = FALSE) +
  theme_bw() +
  annotate("text", x = Inf, y = Inf, label = paste0("UP: ", up), 
           hjust = 1.1, vjust = 2, size = 5, color = "red") +
  annotate("text", x = -Inf, y = Inf, label = paste0("DN: ", dn), 
           hjust = -0.1, vjust = 2, size = 5, color = "royalblue") +
  theme_bw() + ggtitle(t)

significance : p value <= 0.05
FC >= 1.2

DEG table

deg %>% DT::datatable(extensions = "Buttons", options = list(dom="Bfrtip", buttons=c("csv","excel"), pageLength=10))
gsea.res = perform_GSEA(res = deg, ref = hallmark, pvalueCutoff = 1)

GSEA plot colored by p-value

gsea_nes_plot(gsea.res = gsea.res, title = t, color = "pvalue") 

GSEA plot colored by significance

gsea_nes_plot <- function(gsea.res, title, color="pvalue") {
  gsea.res = gsea.res %>% mutate(sig=ifelse(pvalue <= 0.05, "p value <= 0.05", "p value > 0.05"))
  
  # 기본 플롯
  p <- gsea.res %>%
    ggplot(aes(reorder(ID, NES), NES)) +
    geom_col(aes(fill=!!sym(color)), color="grey5", size=0.15, alpha=0.8) +
    coord_flip() +
    labs(x="Pathway", y="Normalized Enrichment Score", title="GSEA") +
    theme_classic() +
    theme(axis.text.x = element_text(size=5, face = 'bold'),
          axis.text.y = element_text(size=6, face = 'bold'),
          axis.title = element_text(size=10)) +
    ggtitle(title)
  
  # color에 따른 색상 설정
  if (color == "pvalue") {
    p <- p + scale_fill_gradient(low = 'orangered', high = '#E5E7E9')
  } else if (color == "sig") {
    # sig 값이 "p value <= 0.05"가 없으면 기본 색상을 "#E5E7E9"로 설정
    if (!any(gsea.res$sig == "p value <= 0.05")) {
      p <- p + scale_fill_manual(values = c("#E5E7E9"))
    } else {
      p <- p + scale_fill_manual(values = c("orangered", "#E5E7E9"))
    }
  }
  return(p)
}
gsea_nes_plot(gsea.res = gsea.res, title = t, color = "sig")